# code:utf8
import json
import logging
from datetime import datetime
from pyquery import PyQuery
from zc_core.pipelines.helper.catalog_helper import CatalogHelper
from zc_core.pipelines.helper.supplier_helper import SupplierHelper
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.model.items import Catalog, Supplier, ItemData, Sku
from zc_core.util.common import parse_number, parse_time
from zc_core.util.encrypt_util import md5, decode_base64
from zc_core.util.sku_id_parser import parse_sp_sku_id, convert_id2code

from henan.utils.spu_helper import SpuHelper
from henan.items import HenanOrder
from henan.matcher import *

logger = logging.getLogger('rules')


# 解析catalog列表
def parse_catalog(response):
    jpy = PyQuery(response.text)
    root_list = jpy('div.category-layer div.list')

    cats = list()
    for idx, nav1 in enumerate(root_list.items()):
        cat1_name = nav1('dl.cat dt.cat-name a').attr('title').strip()
        cat1_id = md5(cat1_name)
        cat1 = _build_catalog(cat1_id, cat1_name, '', 1)
        cats.append(cat1)

        cat2_dls = nav1('div.categorys div.item-left div.subitems dl.fore1')
        if cat2_dls:
            for cat2_dl in cat2_dls.items():
                cat2_name = cat2_dl('dt a').attr('title').strip()
                cat2_id = md5(cat2_name)
                cat2 = _build_catalog(cat2_id, cat2_name, cat1_id, 2)
                cats.append(cat2)

                cat3_links = cat2_dl('dd a')
                if cat3_links:
                    for cat3_link in cat3_links.items():
                        cat3_name = cat3_link.attr('title').strip()
                        cat3_id = match_cat_id(cat3_link.attr('href'))
                        cat3 = _build_catalog(cat3_id, cat3_name, cat2_id, 3)
                        cats.append(cat3)

    return cats


def _build_catalog(cat_id, cat_name, parent_id, level):
    cat = Catalog()
    cat['catalogId'] = cat_id
    cat['catalogName'] = cat_name
    cat['parentId'] = parent_id
    cat['level'] = level
    if level == 3:
        cat['leafFlag'] = 1
    else:
        cat['leafFlag'] = 0
    cat['linkable'] = 1

    return cat


# 解析supplier列表
def parse_supplier(response):
    suppliers = list()
    pyq = PyQuery(response.text)
    trs = pyq('table.layui-table tr')
    if trs:
        for idx, tr in enumerate(trs.items()):
            if idx == 0:
                continue
            td = tr('td').eq(0)
            supplier = Supplier()
            sp_name = td.text().strip()
            supplier['id'] = md5(sp_name)
            supplier['name'] = sp_name
            suppliers.append(supplier)

    return suppliers


# sku：查询总页数
def parse_sku_page(response):
    pyq = PyQuery(response.text)
    page_txt = pyq('li.controls a').text()
    total_page = match_total_page(page_txt).strip()
    if total_page:
        return int(total_page)

    return 0


# 解析sku列表
def parse_sku_data(response):
    sp_helper = SupplierHelper()
    cat_helper = CatalogHelper()
    spu_helper = SpuHelper()
    meta = response.meta
    cat_id = meta.get('catalogId')
    jpy = PyQuery(response.text)

    sku_list = list()
    item_list = list()
    node_list = jpy('ul.list-grid li.item')
    price_list = match_prices(response.text)
    if node_list and price_list and len(node_list) == len(price_list):
        for idx, node in enumerate(node_list.items()):
            link = node('div.item-info div.item-name a')
            link_id = match_sku_id(link.attr('href'))
            if not link_id:
                continue
            sku_id = decode_base64(link_id)
            spu_id = spu_helper.get_spu_id(sku_id)
            price = price_list[idx]
            com = node('div.item-info div.item-company > em.layui-elip')
            sold_count = parse_number(node('div.item-info div.icon_ds_num em.sale-count a').text().strip(), 0)
            img = node('div.item-pic img').attr('data-original')
            sku = Sku()
            data = ItemData()
            sku['skuId'] = sku_id
            data['skuId'] = sku_id
            if spu_id:
                sku['spuId'] = spu_id
                data['spuId'] = spu_id
            sku['linkId'] = link_id
            data['linkId'] = link_id
            data['skuName'] = link.attr('title')
            data['catalog3Id'] = cat_id
            sku['catalog3Id'] = cat_id
            cat_helper.fill(data)
            sku['soldCount'] = int(sold_count)
            data['soldCount'] = int(sold_count)
            data['skuImg'] = img

            if price:
                price_num = parse_number(price)
                sku['salePrice'] = price_num
                sku['originPrice'] = price_num
                data['salePrice'] = price_num
                data['originPrice'] = price_num
            if com:
                com.remove('span')
                sp_name = com.text().strip()
                sp_id = sp_helper.get_id_by_name(sp_name)
                sku['supplierName'] = sp_name
                data['supplierName'] = sp_name
                sku['supplierId'] = sp_id
                data['supplierId'] = sp_id
            # 有效商品
            if data.get('salePrice', 0) > 0:
                sku_list.append(sku)
                item_list.append(data)
    elif len(node_list) != len(price_list):
        logger.error('数据异常: sku_cnt=%s, price_cnt=%s' % (len(sku_list), len(price_list)))

    return sku_list, item_list


# 解析sku列表
def parse_item_data(response):
    meta = response.meta
    batch_no = meta.get('batchNo')
    sku_id = meta.get('skuId')

    jpy = PyQuery(response.text)
    sku_name = jpy('h1.goods-name').remove('span').text().strip()
    brand = jpy('div.freight div.dt:contains("品牌") + div.dd span.freight-info').text()
    price = match_price_id(response.text)
    spu_id = match_spu_id(response.text)

    item = ItemData()
    # -------------------------------------------------
    item['batchNo'] = batch_no
    item['skuId'] = sku_id
    item['spuId'] = spu_id
    item['skuName'] = sku_name
    sale_price = parse_number(price)
    if sale_price:
        item['salePrice'] = sale_price
        item['originPrice'] = sale_price
    if brand:
        item['brandId'] = md5(brand.strip())
        item['brandName'] = brand.strip()
    # -------------------------------------------------

    return item


# =============================================
# 解析order列表页数
def parse_total_page(response):
    jpy = PyQuery(response.text)

    return int(jpy('div.page_wrap a.nextPag').prev().text().strip())


# 解析order列表
def parse_order(response):
    orders = list()
    jpy = PyQuery(response.text)
    trs = jpy('table.clinch_success tbody tr')
    for idx, tr in enumerate(trs.items()):
        if idx == 0:
            continue
        if tr:
            order = _build_order(tr)
            orders.append(order)

    return orders


def _build_order(tr):
    order_detail_url = 'http://222.143.21.205:8081/policy/articleDetail?id={}'

    order = HenanOrder()
    tds = tr('td')
    onclick = tds('a').attr('onclick')
    url_id = match_order_id(onclick)
    order_id = decode_base64(url_id.encode('utf-8'))
    title = tds.eq(1).text().strip()
    p_time = tds.eq(5).text().strip()
    order_time = parse_time(p_time, fmt='%Y-%m-%d %H:%M:%S')
    area = tds.eq(4).text().strip()
    order['id'] = order_id
    order['url'] = order_detail_url.format(url_id)
    order['orderTime'] = order_time
    order['batchNo'] = time_to_batch_no(order_time)
    order['title'] = title
    order['area'] = area
    order['genTime'] = datetime.utcnow()

    return order


# 解析order_item列表
def parse_order_item(response):
    meta = response.meta
    order_id = meta.get('orderId')
    order_time = meta.get('orderTime')
    jpy = PyQuery(response.text)

    order_dept = jpy('div.annoucement ul li span:contains("采购人：") + em').text()
    supplier_name = jpy('div.annoucement ul li span:contains("成交电商：") + em').text()
    order_code = jpy('div.annoucement ul li span:contains("电商订单编号：") + em').text()

    items = list()
    trs = jpy('table.layui-table tbody tr')
    for idx, tr in enumerate(trs.items()):
        tds = tr('td')
        if not tds or len(tds) < 4:
            continue
        if tds:
            order_item = dict()
            order_item['id'] = order_id + '_' + str(idx)
            order_item['orderId'] = order_id
            order_item['orderCode'] = order_code
            order_item['orderDept'] = order_dept
            order_item['supplierName'] = supplier_name
            order_item['orderTime'] = order_time

            order_item['skuName'] = tds.eq(1).text().strip()
            order_item['catalogBrand'] = tds.eq(2).text().strip()
            order_item['count'] = round(parse_number(tds.eq(3).text().strip()))
            order_item['unitPrice'] = parse_number(tds.eq(4).text().strip())
            order_item['amount'] = parse_number(tds.eq(6).text().strip())

            order_item['batchNo'] = time_to_batch_no(order_time)
            order_item['genTime'] = datetime.utcnow()

            items.append(order_item)

    return items


# sku：解析价格阶梯
def parse_price_ladder(response):
    pyq = PyQuery(response.text)
    ladder_list = list()
    labels = pyq('ul[data-name="jiage"] li.radio_the em')
    for item in labels.items():
        ladder_list.append(item.text().strip())

    return ladder_list


# 解析same sku列表
def parse_same_list(response):
    cat_helper = CatalogHelper()
    meta = response.meta
    cat3_id = meta.get('catalog3Id')
    skus = list()
    items = list()
    total_page = 0
    js = json.loads(response.text)
    if js:
        total_page = js.get('totalPages', 0)
        rows = js.get('content', [])
        for row in rows:
            sku = Sku()
            sku['skuId'] = row.get('mongoId')
            sku['spuId'] = row.get('xhbh')
            sku['salePrice'] = row.get('sjjg')
            sku['catalog3Id'] = cat3_id
            sku['soldCount'] = row.get('volumeCount')
            skus.append(sku)

            item = ItemData()
            item['skuId'] = row.get('mongoId')
            item['spuId'] = row.get('xhbh')
            item['skuName'] = row.get('xhmc')
            item['skuImg'] = row.get('sltp', '')
            item['salePrice'] = row.get('sjjg')
            item['originPrice'] = row.get('sjjg')
            # 三级分类
            item['catalog3Id'] = cat3_id
            cat_helper.fill(item)
            item['soldCount'] = row.get('volumeCount')
            brand = row.get('ppmc', '')
            if brand:
                item['brandId'] = md5(brand.strip())
                item['brandName'] = brand.strip()
            supplier = row.get('dsmc', '')
            if supplier:
                item['supplierId'] = md5(supplier.strip())
                item['supplierName'] = supplier.strip()
            sp_sku_link = row.get('productlink', '')
            if sp_sku_link:
                if 'http:///' in sp_sku_link:
                    sp_sku_link = sp_sku_link.replace('http:///', 'http://')
                item['supplierSkuLink'] = sp_sku_link
                sp_sku_id = parse_sp_sku_id(sp_sku_link)
                if sp_sku_id:
                    item['supplierSkuId'] = sp_sku_id
                    sp_sku_code = sp_sku_id
                    if '得力' in supplier:
                        sp_sku_code = convert_id2code('deli', sp_sku_id)
                    item['supplierSkuCode'] = sp_sku_code
            item['onSaleTime'] = parse_time(row.get('gxsj'), fmt='%Y-%m-%d %H:%M:%S.%f')
            item['genTime'] = datetime.utcnow()
            items.append(item)

    return skus, items, total_page
